package com.shujia.flink.state;

import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class Demo5ExactlyOnce {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //创建kafka source
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers("master:9092,node1:9092,node2:9092")//kafka集群列表
                .setTopics("words")//指定消费的topic
                .setGroupId("my-group")//指定消费者组
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())//指定读取数据的格式
                .build();

        //使用kafka source
        DataStream<String> wordsDS = env
                .fromSource(source, WatermarkStrategy.noWatermarks(), "Kafka Source");


        //3、统计单词的数量
        DataStream<Tuple2<String, Integer>> kvDS = wordsDS
                .map(word -> Tuple2.of(word, 1), Types.TUPLE(Types.STRING, Types.INT));

        //分组统计单词的数量
        KeyedStream<Tuple2<String, Integer>, String> keyByDS = kvDS.keyBy(kv -> kv.f0);

        //对下标为1的列求和
        DataStream<Tuple2<String, Integer>> countDS = keyByDS.sum(1);

        //打印数据
        countDS.print();

        //启动flink
        env.execute();

    }
}
